# load data.table for faster operations
library(data.table)
Registered S3 method overwritten by 'data.table':
method from
print.data.table
data.table 1.12.8 using 2 threads (see ?getDTthreads). Latest news: r-datatable.com
Attaching package: 㤼㸱data.table㤼㸲
The following objects are masked from 㤼㸱package:dplyr㤼㸲:
between, first, last
The following object is masked from 㤼㸱package:purrr㤼㸲:
transpose
The following object is masked from 㤼㸱package:plm㤼㸲:
between
library(tidyverse)
# use fread for fast reading of data csv files
# ColumnNameCleaner.rmd should be run first
cases <- fread("Data/time_series_covid19_confirmed_US.csv")
vaccineCounty <- fread("Data/Vaccination/COVID-19_Vaccinations_in_the_United_States_County.csv")
#vaccineJurisdiction <- fread("Data/Vaccination/COVID-19_Vaccinations_in_the_United_States_Jurisdiction.csv")
pop <- fread("Data/Population/co-est2019-alldata.csv")
distancing <- fread("Data/Distancing/Unacast_Social_Distancing_Grades.csv")
masking <- fread("Data/Masking/mask-use-by-county.csv")
# preview the data tables
head(cases)
head(vaccineCounty)
#head(vaccineJurisdiction)
head(distancing)
head(masking)
head(pop)
# either create or fix the FIPS column using available data
cases[, FIPS := sprintf("%05d", cases[,FIPS])]
pop[, FIPS := sprintf("%02d%03d", pop[,STATE], pop[,COUNTY])]
# create a population density column too
distancing[, c("FIPS", "popDensity") := .(sprintf("%05d", county_fips), county_population / Shape__Area)]
masking[, FIPS := sprintf("%05d", COUNTYFP)]
# drops unneeded columns
cases <- cases[, !c("V1", "UID", "iso2", "iso3", "code3", "CombinedKey")]
# selects the most recent population estimate (2019) and drops what will be redundant columns
years2019 <- grep("^([^0-9]*)$|2019", colnames(pop))
pop <- pop[, ..years2019][, !c("SUMLEV", "STATE", "COUNTY", "STNAME", "CTYNAME")]
# drops what will be redundant columns
distancing <- distancing[, !c("OBJECTID", "state_fips", "state_name", "county_fips", "county_name")]
masking <- masking[, !c("COUNTYFP")]
totalCases <- grep("[0-9]{4}", colnames(cases))
cases[, TotalCases := rowSums(cases[, ..totalCases], na.rm = TRUE)]
Both 'totalCases' and '..totalCases' exist in calling scope. Please remove the '..totalCases' variable in calling scope for clarity.
grades <- c("A+"=4.3,"A"=4,"A-"=3.7,"B+"=3.3,"B"=3,"B-"=2.7, "C+"=2.3,"C"=2,"C-"=1.7,"D+"=1.3,"D"=1,"D-"= 0.7, "F"=0)
gfunc <- function(x) {
grades[as.character(x)]
}
distancing[, c("grade_total", "grade_distance", "grade_visitation", "grade_encounters") :=
.(gfunc(grade_total), gfunc(grade_distance), gfunc(grade_visitation), gfunc(grade_encounters))]
# set the FIPS column as the key for faster data.table operations
# time series
setkey(cases, FIPS)
setkey(vaccineCounty, FIPS)
setkey(distancing, FIPS)
# not time series
setkey(masking, FIPS)
setkey(pop, FIPS)
pivot_wider(vaccineCounty, names_from = Date, values_from = )
csv <- cases[pop,][distancing,][masking,]
write.csv(csv, "Data/casesPopDistancingMasking.csv", row.names = FALSE)
openxlsx::write.xlsx(csv, "Data/casesPopDistancingMasking.xlsx", overwrite = TRUE)
dates <- grep("D[0-9]{4}_[0-9]{2}_[0-9]{2}", colnames(csv))
openxlsx::write.xlsx(csv[, !..dates], "Data/noDates.xlsx", overwrite = TRUE)
csv
LS0tDQp0aXRsZTogIlIgTm90ZWJvb2siDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQpgYGB7cn0NCiMgbG9hZCBkYXRhLnRhYmxlIGZvciBmYXN0ZXIgb3BlcmF0aW9ucw0KbGlicmFyeShkYXRhLnRhYmxlKQ0KbGlicmFyeSh0aWR5dmVyc2UpDQpgYGANCg0KDQpgYGB7cn0NCiMgdXNlIGZyZWFkIGZvciBmYXN0IHJlYWRpbmcgb2YgZGF0YSBjc3YgZmlsZXMNCiAgIyBDb2x1bW5OYW1lQ2xlYW5lci5ybWQgc2hvdWxkIGJlIHJ1biBmaXJzdA0KY2FzZXMgPC0gZnJlYWQoIkRhdGEvdGltZV9zZXJpZXNfY292aWQxOV9jb25maXJtZWRfVVMuY3N2IikNCnZhY2NpbmVDb3VudHkgPC0gZnJlYWQoIkRhdGEvVmFjY2luYXRpb24vQ09WSUQtMTlfVmFjY2luYXRpb25zX2luX3RoZV9Vbml0ZWRfU3RhdGVzX0NvdW50eS5jc3YiKQ0KI3ZhY2NpbmVKdXJpc2RpY3Rpb24gPC0gZnJlYWQoIkRhdGEvVmFjY2luYXRpb24vQ09WSUQtMTlfVmFjY2luYXRpb25zX2luX3RoZV9Vbml0ZWRfU3RhdGVzX0p1cmlzZGljdGlvbi5jc3YiKQ0KcG9wIDwtIGZyZWFkKCJEYXRhL1BvcHVsYXRpb24vY28tZXN0MjAxOS1hbGxkYXRhLmNzdiIpDQpkaXN0YW5jaW5nIDwtIGZyZWFkKCJEYXRhL0Rpc3RhbmNpbmcvVW5hY2FzdF9Tb2NpYWxfRGlzdGFuY2luZ19HcmFkZXMuY3N2IikNCm1hc2tpbmcgPC0gZnJlYWQoIkRhdGEvTWFza2luZy9tYXNrLXVzZS1ieS1jb3VudHkuY3N2IikNCmBgYA0KDQoNCmBgYHtyfQ0KIyBwcmV2aWV3IHRoZSBkYXRhIHRhYmxlcw0KaGVhZChjYXNlcykNCmhlYWQodmFjY2luZUNvdW50eSkNCiNoZWFkKHZhY2NpbmVKdXJpc2RpY3Rpb24pDQpoZWFkKGRpc3RhbmNpbmcpDQpoZWFkKG1hc2tpbmcpDQpoZWFkKHBvcCkNCmBgYA0KDQoNCmBgYHtyfQ0KIyBlaXRoZXIgY3JlYXRlIG9yIGZpeCB0aGUgRklQUyBjb2x1bW4gdXNpbmcgYXZhaWxhYmxlIGRhdGENCmNhc2VzWywgRklQUyA6PSBzcHJpbnRmKCIlMDVkIiwgY2FzZXNbLEZJUFNdKV0NCnBvcFssIEZJUFMgOj0gc3ByaW50ZigiJTAyZCUwM2QiLCBwb3BbLFNUQVRFXSwgcG9wWyxDT1VOVFldKV0NCiMgY3JlYXRlIGEgcG9wdWxhdGlvbiBkZW5zaXR5IGNvbHVtbiB0b28NCmRpc3RhbmNpbmdbLCBjKCJGSVBTIiwgInBvcERlbnNpdHkiKSA6PSAuKHNwcmludGYoIiUwNWQiLCBjb3VudHlfZmlwcyksIGNvdW50eV9wb3B1bGF0aW9uIC8gU2hhcGVfX0FyZWEpXQ0KbWFza2luZ1ssIEZJUFMgOj0gc3ByaW50ZigiJTA1ZCIsIENPVU5UWUZQKV0NCmBgYA0KDQoNCmBgYHtyfQ0KIyBkcm9wcyB1bm5lZWRlZCBjb2x1bW5zDQpjYXNlcyA8LSBjYXNlc1ssICFjKCJWMSIsICJVSUQiLCAiaXNvMiIsICJpc28zIiwgImNvZGUzIiwgIkNvbWJpbmVkS2V5IildDQojIHNlbGVjdHMgdGhlIG1vc3QgcmVjZW50IHBvcHVsYXRpb24gZXN0aW1hdGUgKDIwMTkpIGFuZCBkcm9wcyB3aGF0IHdpbGwgYmUgcmVkdW5kYW50IGNvbHVtbnMNCnllYXJzMjAxOSA8LSBncmVwKCJeKFteMC05XSopJHwyMDE5IiwgY29sbmFtZXMocG9wKSkNCnBvcCA8LSBwb3BbLCAuLnllYXJzMjAxOV1bLCAhYygiU1VNTEVWIiwgIlNUQVRFIiwgIkNPVU5UWSIsICJTVE5BTUUiLCAiQ1RZTkFNRSIpXQ0KIyBkcm9wcyB3aGF0IHdpbGwgYmUgcmVkdW5kYW50IGNvbHVtbnMNCmRpc3RhbmNpbmcgPC0gZGlzdGFuY2luZ1ssICFjKCJPQkpFQ1RJRCIsICJzdGF0ZV9maXBzIiwgInN0YXRlX25hbWUiLCAiY291bnR5X2ZpcHMiLCAiY291bnR5X25hbWUiKV0NCm1hc2tpbmcgPC0gbWFza2luZ1ssICFjKCJDT1VOVFlGUCIpXQ0KYGBgDQoNCg0KYGBge3J9DQp0b3RhbENhc2VzIDwtIGdyZXAoIlswLTldezR9IiwgY29sbmFtZXMoY2FzZXMpKQ0KY2FzZXNbLCBUb3RhbENhc2VzIDo9IHJvd1N1bXMoY2FzZXNbLCAuLnRvdGFsQ2FzZXNdLCBuYS5ybSA9IFRSVUUpXQ0KYGBgDQoNCg0KYGBge3J9DQpncmFkZXMgPC0gYygiQSsiPTQuMywiQSI9NCwiQS0iPTMuNywiQisiPTMuMywiQiI9MywiQi0iPTIuNywgIkMrIj0yLjMsIkMiPTIsIkMtIj0xLjcsIkQrIj0xLjMsIkQiPTEsIkQtIj0gMC43LCAiRiI9MCkNCg0KZ2Z1bmMgPC0gZnVuY3Rpb24oeCkgew0KICBncmFkZXNbYXMuY2hhcmFjdGVyKHgpXQ0KfQ0KDQpkaXN0YW5jaW5nWywgYygiZ3JhZGVfdG90YWwiLCAiZ3JhZGVfZGlzdGFuY2UiLCAiZ3JhZGVfdmlzaXRhdGlvbiIsICJncmFkZV9lbmNvdW50ZXJzIikgOj0NCiAgICAgICAgICAgICAuKGdmdW5jKGdyYWRlX3RvdGFsKSwgZ2Z1bmMoZ3JhZGVfZGlzdGFuY2UpLCBnZnVuYyhncmFkZV92aXNpdGF0aW9uKSwgZ2Z1bmMoZ3JhZGVfZW5jb3VudGVycykpXQ0KYGBgDQoNCg0KYGBge3J9DQojIHNldCB0aGUgRklQUyBjb2x1bW4gYXMgdGhlIGtleSBmb3IgZmFzdGVyIGRhdGEudGFibGUgb3BlcmF0aW9ucw0KIyB0aW1lIHNlcmllcw0Kc2V0a2V5KGNhc2VzLCBGSVBTKQ0Kc2V0a2V5KHZhY2NpbmVDb3VudHksIEZJUFMpDQpzZXRrZXkoZGlzdGFuY2luZywgRklQUykNCiMgbm90IHRpbWUgc2VyaWVzDQpzZXRrZXkobWFza2luZywgRklQUykNCnNldGtleShwb3AsIEZJUFMpDQpgYGANCg0KDQpgYGB7cn0NCnBpdm90X3dpZGVyKHZhY2NpbmVDb3VudHksIG5hbWVzX2Zyb20gPSBEYXRlLCB2YWx1ZXNfZnJvbSA9ICkNCmBgYA0KDQoNCmBgYHtyfQ0KY3N2IDwtIGNhc2VzW3BvcCxdW2Rpc3RhbmNpbmcsXVttYXNraW5nLF0NCndyaXRlLmNzdihjc3YsICJEYXRhL2Nhc2VzUG9wRGlzdGFuY2luZ01hc2tpbmcuY3N2Iiwgcm93Lm5hbWVzID0gRkFMU0UpDQpvcGVueGxzeDo6d3JpdGUueGxzeChjc3YsICJEYXRhL2Nhc2VzUG9wRGlzdGFuY2luZ01hc2tpbmcueGxzeCIsIG92ZXJ3cml0ZSA9IFRSVUUpDQpkYXRlcyA8LSBncmVwKCJEWzAtOV17NH1fWzAtOV17Mn1fWzAtOV17Mn0iLCBjb2xuYW1lcyhjc3YpKQ0Kb3Blbnhsc3g6OndyaXRlLnhsc3goY3N2WywgIS4uZGF0ZXNdLCAiRGF0YS9ub0RhdGVzLnhsc3giLCBvdmVyd3JpdGUgPSBUUlVFKQ0KY3N2DQpgYGANCg0K